{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Formatting the trainset output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "def modify_json_file(input_file_path, output_file_path=None):\n",
    "    # If output_file_path is not provided, generate one\n",
    "    if output_file_path is None:\n",
    "        file_name, file_extension = os.path.splitext(input_file_path)\n",
    "        output_file_path = f\"{file_name}_modified_new{file_extension}\"\n",
    "\n",
    "    # Read the JSON file\n",
    "    with open(input_file_path, 'r') as file:\n",
    "        data = json.load(file)\n",
    "    \n",
    "    # Check if the loaded data is a list of dictionaries\n",
    "    if isinstance(data, list):\n",
    "        for item in data:\n",
    "            if isinstance(item, dict) and 'steps' in item:\n",
    "                # Modify the first step in each dictionary\n",
    "                if item['steps'] and isinstance(item['steps'][0], str) and item['steps'][0].startswith('search['):\n",
    "                    item['steps'][0] = 'I should first generate a search action based on the instruction'\n",
    "    \n",
    "    # If the loaded data is a single dictionary\n",
    "    elif isinstance(data, dict) and 'steps' in data:\n",
    "        if data['steps'] and isinstance(data['steps'][0], str) and data['steps'][0].startswith('search['):\n",
    "            data['steps'][0] = 'I should first generate a search action based on the instruction'\n",
    "    \n",
    "    # Write the modified data to the new file\n",
    "    with open(output_file_path, 'w') as file:\n",
    "        json.dump(data, file, indent=2)\n",
    "    \n",
    "    print(f\"Modified data has been saved to: {output_file_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modify_json_file(\"json file from trainset\") # change this to the json file from the webshop_trainset_building.py output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('the modified data','r') as f:  # change this to the modified data file from last step\n",
    "    data_test = json.load(f)\n",
    "\n",
    "\n",
    "\n",
    "def transform_dict_to_list(input_dict):\n",
    "    \"\"\"\n",
    "    Coverting the format to the training format\n",
    "    \"\"\"\n",
    "    result = []\n",
    "    problem_text = input_dict['problem']\n",
    "    steps = input_dict['steps']\n",
    "    models = input_dict['models']\n",
    "\n",
    "    # Create the allSubtask string\n",
    "    all_subtask = \"; \".join([f\"step{i+1}: {step}\" for i, step in enumerate(steps)])\n",
    "\n",
    "    for i, (step, model) in enumerate(zip(steps, models)):\n",
    "        new_dict = {\n",
    "            \"problemText\": problem_text,\n",
    "            \"allSubtask\": all_subtask,\n",
    "            \"nowSubtask\": f\"step{i+1}: {step}\",\n",
    "            \"difficultyNum\": 0 if 'llama3' in model else 1\n",
    "        }\n",
    "        result.append(new_dict)\n",
    "\n",
    "    return result\n",
    "\n",
    "# transform every dictionary in the list to a list of dictionaries\n",
    "transformed_data = []\n",
    "for item in data_test:\n",
    "    transformed_data.extend(transform_dict_to_list(item))\n",
    "\n",
    "# Save the transformed data to a new file\n",
    "output_file_path = 'transformed_data.json'\n",
    "with open(output_file_path, 'w') as file:\n",
    "    json.dump(transformed_data, file, indent=2)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Converting the model allocation result to DOT input format\n",
    "This will modify the test set to the reallocated model selections."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the test set file \n",
    "with open('webshop_succeed_finetune_final.json', 'r') as f:\n",
    "    data = json.load(f)\n",
    "\n",
    "# Load the reallocated file\n",
    "with open('Webshop_DOT_Allocation.json', 'r') as f:\n",
    "    reallocated_data = json.load(f)\n",
    "\n",
    "# Create a mapping of steps to models\n",
    "step_to_model = {}\n",
    "for item in reallocated_data:\n",
    "    problem = item['problemText']\n",
    "    step = item['nowSubtask'].split(': ', 1)[1]\n",
    "    model = 'llama3-8b-8192' if item['difficultyNum'] == 0 else 'gpt-4o'\n",
    "    \n",
    "    if problem not in step_to_model:\n",
    "        step_to_model[problem] = {}\n",
    "    step_to_model[problem][step] = model\n",
    "\n",
    "# Update the models in the first file\n",
    "for item in data:\n",
    "    problem = item['problem']\n",
    "    if problem in step_to_model:\n",
    "        new_models = []\n",
    "        for step, old_model in zip(item['steps'], item['models']):\n",
    "            new_model = step_to_model[problem].get(step, old_model)\n",
    "            new_models.append(new_model)\n",
    "        item['models'] = new_models\n",
    "\n",
    "# Save the updated data to a new JSON file\n",
    "with open('updated_first_file.json', 'w') as f:\n",
    "    json.dump(data, f, indent=2)\n",
    "\n",
    "print(\"Updated JSON file has been saved as 'updated_first_file.json'\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
